RQ1: What baseline factors predict participation in TEDS?

In this script, we use data collected at Y2 to predict attrition and most of the later time points.

Analyses run in docker container bignardig/tidyverse451:v7

Updated on 8-Jan-2026 to replace household variable with derived single parent variable.

Load Data

Code
source("0_load_data.R")
set.seed(1)

df_rq1 = df %>%
  filter(twin == 1) %>%
  filter(acontact == 1) %>%
  select(all_of(c(rq1x, rq1y, rq1y_twin1, rq1y_twin2, "cohort")))

df_rq1x = df_rq1 %>% 
  select(all_of(rq1x)) 

options(width = 900)

At some time points, participants in cohorts 3 & 4 were not all eligible to take part

List of variables

Code
data.frame(
  var = c(rq1x, rq1y, rq1y_twin),
  labels = c(rq1x_labels, rq1y_labels, rq1y_twin_labels),
  clean_labels = c(rq1x_labels_clean, rq1y_labels_clean, rq1y_twin_labels)
)

Singly Impute Missing Predictor/Baseline Data

Imputation

Code
daddy_issues = df_rq1 %>%
  select(adadagetw, afasoc2, afahqual)

df_rq1_imputed = df_rq1 %>%
  # select(-adadagetw, -afasoc2, -afahqual) %>% # removing these eliminates imputation warnings
  mice(method = "pmm", m =1)

 iter imp variable
  1   1  amumagetw  adadagetw  asingle  amedtot  afasoc2  afahqual  amosoc2  amohqual  atwmed1  aethnicc  alang  atwclub  alookels  asmoke  adrink  astress  pollution1998pca
  2   1  amumagetw  adadagetw  asingle  amedtot  afasoc2  afahqual  amosoc2  amohqual  atwmed1  aethnicc  alang  atwclub  alookels  asmoke  adrink  astress  pollution1998pca
  3   1  amumagetw  adadagetw  asingle  amedtot  afasoc2  afahqual  amosoc2  amohqual  atwmed1  aethnicc  alang  atwclub  alookels  asmoke  adrink  astress  pollution1998pca
  4   1  amumagetw  adadagetw  asingle  amedtot  afasoc2  afahqual  amosoc2  amohqual  atwmed1  aethnicc  alang  atwclub  alookels  asmoke  adrink  astress  pollution1998pca
  5   1  amumagetw  adadagetw  asingle  amedtot  afasoc2  afahqual  amosoc2  amohqual  atwmed1  aethnicc  alang  atwclub  alookels  asmoke  adrink  astress  pollution1998pca
Code
df_rq1_imputed$loggedEvents
NULL
Code
df_rq1_imputed = complete(df_rq1_imputed)

vars_to_na = c("btwdata","ctwdata", "gcdata","icdata","jcdata","pcwebdata")

df_rq1_imputed = df_rq1_imputed %>%
  mutate(
      across(starts_with(vars_to_na), ~ ifelse(cohort %in% c("Cohort 3: twins born Sep-95 to Aug-96","Cohort 4: twins born Sep-96 to Dec-96"), NA, .))
    )

# df_rq1_imputed$gcdata1 %>% table(., useNA = "always")

Create Long Imputed dataset for twin-level analyses

For imputation, because predictor variables are only measured at the family level, imputation is performed in the wide format with one row per family.

For modelling twin-level outcomes with glm(), we need to convert outcomes to a long format with one row per twin.

Code
df_rq1_imputed$famid = 1:nrow(df_rq1_imputed)

df1 = df_rq1_imputed[c("famid", rq1x, rq1y_twin1)]
df2 = df_rq1_imputed[c("famid", rq1x, rq1y_twin2)]

  # Rename twin2 columns to match twin1 column names
colnames(df2) = colnames(df1)

df_rq1_long = rbind(df1, df2) %>%
  arrange(famid)

rm(df1, df2)

# Set outcome to missing for participants ineligible at specific time points

df_rq1_long$btwdata1 %>% table(., useNA = "always")
.
<NA> 
   0 
Code
df_rq1_long %>% sapply(., function(x) length(which(is.na(x))))
           famid             sex1        amumagetw        adadagetw          asingle            zygos          amedtot          afasoc2         afahqual          amosoc2         amohqual          atwmed1         aethnicc            alang         anoldsib         anyngsib          atwclub         alookels           asmoke           adrink          astress pollution1998pca         dtwdata1         lcwdata1         lcqdata1        pcbhdata1         rcqdata1         u1cdata1         zmhdata1          zcdata1 
               0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0 

Correlation of participation over time

Code
df %>%
  filter(twin == 1) %>%
  select(starts_with(rq1y_twin)) %>% 
  select(matches("1$|2$")) %>%
  rename_with(.fn = function(x) var_to_label(x, twinsuffix = TRUE)) %>%
  rename_with(.fn = function(x) gsub(", 1Y 0N","",x)) %>%
  gbtoolbox::plot_correlations(
    sample_size = FALSE,
    confidence_interval = FALSE
  )
Warning in gbtoolbox::plot_correlations(., sample_size = FALSE, confidence_interval = FALSE): This function is in development, and not yet ready for widespread use. 
  Proceed with caution

Code
df %>% 
  select(randomfamid,twin,all_of(rq1y)) %>% 
  filter(twin == 1) %>%
  select(-twin, -randomfamid) %>% 
  `colnames<-`(c(rq1y_labels)) %>%
  gbtoolbox::plot_correlations(
    sample_size = FALSE,
    confidence_interval = FALSE
  ) + 
  labs(
    title = "Correlation between participation at different \ntime points",
    subtitle = "N = 13020 (Number of families)"
    )
Warning in gbtoolbox::plot_correlations(., sample_size = FALSE, confidence_interval = FALSE): This function is in development, and not yet ready for widespread use. 
  Proceed with caution

Code
save_plot("1_correlation_participation", width = 9, height = 9)

List of predictor variables

Code
cbind(rq1x, rq1x_labels) %>%
  knitr::kable()
rq1x rq1x_labels
sex1 Twin sex, 0female 1male
amumagetw Age in years of natural mother at time of birth of twins
adadagetw Age in years of natural father at time of birth of twins
asingle Single Parent
zygos Twin pair zygosity (best available estimate), 1MZ 2DZ
amedtot Mother medical risk factor composite scale (1st Contact), standardised
afasoc2 Father SOC employment level (1st Contact), 1-9
afahqual Male parent highest qualification level (1st Contact), see value labels
amosoc2 Mother SOC employment level (1st Contact), 1-9
amohqual Female parent highest qualification level (1st Contact), see value labels
atwmed1 Twin medical risk factor composite scale (1st Contact), standardised
aethnicc Ethnic origin of twins, original codes (1st Contact), see value labels
alang Main language spoken at home (1st Contact), see value labels
anoldsib Number of older siblings (1st Contact), see value labels
anyngsib Number of younger siblings (1st Contact), see value labels
atwclub Member of a Twins Club (1st Contact), 1Y 0N
alookels Twins looked after by anyone else (1st Contact), 1Y 0N
asmoke Smoked cigarettes while pregnant (1st Contact), 1Y 0N
adrink Drank alcohol while pregnant (1st Contact), 1Y 0N
astress Severe stress during pregnancy (1st Contact), 1Y 0N
pollution1998pca Principal Component of 1998 pollution variables

List of outcome variables

Code
cbind(rq1y_twin1, rq1y_twin_labels) %>%
  knitr::kable()
rq1y_twin1 rq1y_twin_labels
dtwdata1 Twin booklet data present (4 Year), 1Y 0N
lcwdata1 Twin web data (at least one test) present at 12, 1Y 0N
lcqdata1 Child questionnaire data present at 12, 1Y 0N
pcbhdata1 Child behaviour booklet data present at 16, 1Y 0N
rcqdata1 Twin questionnaire data present at 18, 1Y 0N
u1cdata1 TEDS21 phase 1 twin qnr data present, 1Y 0N
zmhdata1 TEDS26 twin MHQ data present, 1Y 0N
zcdata1 CATSLife twin web study: data are present, 1Y 0N

Twin-Level Logistic Regressions to each participation outcome

Fit Models

Fit logistic regression models to each participation outcome

Code
set.seed(1)

twinmodels = list()

for (i in seq_along(rq1y_twin)){
  
  cat(i, "/", length(rq1y_twin),"\n")
  formula <- as.formula(paste(rq1y_twin1[i], "~", paste(rq1x, collapse = "+")))
  twinmodels[[i]] = glm(formula, data = df_rq1_long, family = binomial, na.action = na.omit)
  
}
1 / 8 
2 / 8 
3 / 8 
4 / 8 
5 / 8 
6 / 8 
7 / 8 
8 / 8 

Calc participant participation predictions

This approach doesn’t give us standard errors for our predictions.

** Predictions for participants at cohorts 3-4 at specific timepoints are kept in here!! **

** For plotting purposes, the intercept is removed from predictions. **

Code
set.seed(1)

twinmodels_predictions = list()

for (i in seq_along(rq1y_twin)){
  twinmodels_predictions[[i]] = twinmodels[[i]]$data 
  
  twinmodels_predictions[[i]]$prediction = twinmodels[[i]] %>% 
    predict(
      ., 
      newdata = df_rq1_long,
      type = "link", 
      na.action = na.pass,
    ) - twinmodels[[i]]$coefficients[1] # removed intercept term
  
}
names(twinmodels_predictions) = rq1y_twin

twinmodels_predictions = bind_rows(twinmodels_predictions, .id = "outcome")

twinmodels_predictions %>% 
  select(famid, outcome, prediction) %>% 
  mutate(
    outcome = rq1y_twin_labels_clean_extrashort[match(.$outcome, rq1y_twin)]
  ) %>%
  group_by(outcome) %>%
  filter(!duplicated(famid)) %>%
  pivot_wider(
    values_from = "prediction",
    names_from  = "outcome",
    id_cols     = "famid"
  ) %>%
  data.frame() %>%
  select(-famid) %>%
  gbtoolbox::plot_correlations(
    sample_size = FALSE
  ) + 
  labs(
    subtitle = "Correlation between predicted probability (type = link) across outcomes"
  )
Warning in gbtoolbox::plot_correlations(., sample_size = FALSE): This function is in development, and not yet ready for widespread use. 
  Proceed with caution

Code
save_plot("1_rq1_predicted_probability_plots", width = 8, height = 8)

Significance of predictor variables

A detailed version of the figure below can be found here here.

Code
# plan(multicore, workers = ncores_use) # this won't work when rendering quarto docs
plan(multisession, workers = ncores_use)

glm_model_comparison_results = future_lapply(
  twinmodels,
  glm_model_comparison_robust,
  future.seed = TRUE
)
Code
glm_model_comparison_results_df = do.call("rbind.data.frame", glm_model_comparison_results)

glm_model_comparison_results_df$Variables_full = rq1x_labels_clean[match(glm_model_comparison_results_df$Variables_Dropped, rq1x)]

# Adjust p-values and add star

glm_model_comparison_results_df = glm_model_comparison_results_df %>% 
  filter(Variables_Dropped!="None") %>%
  group_by(outcome) %>%
  mutate(Wald_p = stats::p.adjust(Wald_p, method = "holm")) %>% 
  ungroup() %>%
  mutate(
    Wald_p_star = as.character(symnum(Wald_p, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", " ")))
  )

variable_importance_order = glm_model_comparison_results_df %>%
  group_by(Variables_full) %>%
  summarise(mean_score = mean(Wald_p)) %>%
  arrange(mean_score) %>%
  pull(Variables_full)

glm_model_comparison_results_df = glm_model_comparison_results_df %>%
  mutate(
    Variables_full = factor(Variables_full, levels = variable_importance_order),
    var_outcome = paste(Variables_Dropped, outcome, sep = "_")
  )

# Plotting code
glm_model_comparison_results_df %>%
  mutate(
    outcome_labeled = rq1y_twin_labels_clean[match(.$outcome,rq1y_twin1)],
    outcome_labeled = factor(outcome_labeled, levels = rq1y_twin_labels_clean),
    # outcome_labeled = outcome_labeled %>% str_remove("data.*"),
    # outcome_labeled = factor(outcome_labeled, levels =  str_remove(var_to_label(rq1y),"data.*")),
    criterion       = Delta_AUC,
    sig_level = case_when(
      Wald_p < 0.001 ~ "p < 0.001",
      Wald_p < 0.01 ~ "p < 0.01", 
      Wald_p < 0.05 ~ "p < 0.05",
      Wald_p < 0.1 ~ "p < 0.1",
      TRUE ~ "Not significant"
    ),
    sig_level = factor(sig_level, levels = c("p < 0.001", "p < 0.01", "p < 0.05", "p < 0.1", "Not significant"))
  ) %>%
  # Clean the delta_AUC variable that gets printed on the plots 
  mutate(
    criterion2 = gsub("0\\.", ".", sprintf("%.3f", criterion)),
    criterion2 =  case_when(
      Wald_p < 0.001 ~ paste0(criterion2,"***"),
      Wald_p < 0.01 ~ paste0(criterion2,"***"),
      Wald_p <= 0.05 ~ paste0(criterion2,"***"),
      Wald_p > 0.05 ~ paste0(criterion2)
    ),
  ) %>%
  ggplot(aes(y = Variables_full, x = criterion)) + 
  geom_col(aes(fill = sig_level), alpha = 0.8) + 
  geom_text(aes(label = criterion2),  # Remove ALL 0. patterns
            size = 2.5, fontface = "bold", color = "black") +
  facet_wrap(~outcome_labeled, scales = "fixed", ncol = 6) +
  scale_x_continuous(labels = function(x) gsub("0\\.", ".", sprintf("%.3f", x))) +  # Remove ALL 0. patterns
  scale_fill_manual(
    values = c("p < 0.001" = "#d73027", "p < 0.01" = "#fc8d59", 
               "p < 0.05" = "#fee08b", "p < 0.1" = "#e0f3f8", 
               "Not significant" = "#d9d9d9"),
    name = "Significance\n(Holm-adjusted)"
  ) +
  labs(
    x = "Change in AUC (when variable removed)",
    y = NULL,
    title = "Variable Importance by AUC Change",
    subtitle = "Colors show Holm-adjusted significance levels"
  ) +
  theme_minimal() +
  theme(
    axis.text.y = element_text(size = 7),
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
    strip.text = element_text(size = 9, face = "bold"),
    legend.position = "bottom"
  )

Code
save_plot("1_rq1_variable_importance_twin-level", width = 14, height = 7)

Calc variable importance for plotting

Code
variable_importance = glm_model_comparison_results_df %>%
  filter(Variables_Dropped != "None") %>%
  group_by(Variables_full) %>%
  summarise(
    n_significant_05 = sum(Wald_p < 0.05, na.rm = TRUE),
    avg_delta_auc = mean(pmax(0, Delta_AUC * -10000), na.rm = TRUE),
    avg_p_value = mean(Wald_p, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(n_significant_05)) %>%
  mutate(importance_rank = row_number())

Circular Heatmap

Code
rq1y_twin_labels_clean_extrashort2 = paste0(rq1y_twin_labels_clean_extrashort, " ")

# Alternative circular heatmap approach
heatmap_data = glm_model_comparison_results_df %>%
  filter(Variables_Dropped != "None") %>%
  mutate(
    outcome_labeled = rq1y_twin_labels_clean_extrashort2[match(outcome, rq1y_twin1)],
    outcome_labeled = factor(outcome_labeled, levels = rq1y_twin_labels_clean_extrashort2),
    Variables_wrapped = str_wrap(Variables_full, width = 8),
    # Set Delta_AUC to NA if not significant, otherwise keep original negative value
    # Set to grey for specific variables
    Delta_AUC_fill = case_when(
      Wald_p < 0.05 ~ abs(Delta_AUC),
      TRUE ~ NA
    )
  ) %>%
  # Join with importance ranking from above
  left_join(variable_importance, by = "Variables_full") %>%
  mutate(
    # Set factor levels based on importance ranking
    Variables_wrapped = factor(Variables_wrapped, 
                              levels = unique(Variables_wrapped[order(importance_rank)]))
  )

# Add empty variables to create gap
n_vars = length(levels(heatmap_data$Variables_wrapped))
all_vars = c(levels(heatmap_data$Variables_wrapped))

heatmap_data$Variables_wrapped = factor(heatmap_data$Variables_wrapped, levels = all_vars)

ggplot(heatmap_data, aes(x = Variables_wrapped, y = outcome_labeled)) +
  geom_tile(aes(fill = Delta_AUC_fill, 
                color = ifelse(Variables_Dropped %in% c("astress", "atwmed1", "adrink", "anyngsib", "alookels", "afasoc2", "adadagetw", "alang"), "azure2", "black")), 
            size = .5) +
  scale_color_identity() +
  geom_text(
    aes(label = ifelse(Wald_p < 0.05, 
                       gsub("^(-?)0\\.", "\\1.", sprintf("%.3f", Delta_AUC)), ""),
        size = ifelse(outcome_labeled %in% c("Y4 "), 1.4, 2.8)
    ), 
    color = "white"
  ) +
  scale_size_identity() +
  # Add custom y-axis labels in the gap
  coord_polar(theta = "x", start = 0, direction = 1) +
  # scale_x_discrete(drop = FALSE, labels = function(x) ifelse(grepl("GAP_", x), "", x)) +  # Hide GAP labels
  scale_fill_gradient(
    na.value = "white",
    low = "#f48c84", 
    high = "#d73027",
    limits = c(0, max(heatmap_data$Delta_AUC_fill, na.rm = TRUE)),
    guide = "none"
  ) +
  labs(
    tag = "B",
    title = "Variable Importance Heatmap",
    subtitle = "Values indicate change in AUC when variable is removed from model\nRed cells are statistically significant"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 0, size = 11, colour = "black"),
    axis.text.y = element_blank(),  # Hide default y-axis labels
    axis.title = element_blank(),
    panel.grid = element_blank(),
    plot.tag = element_text(hjust = 0, vjust = 0, size = 30, face = "bold"),
    plot.tag.position = "topleft",
    plot.title = element_text(hjust = 0.5, size = 16),
    plot.subtitle = element_text(hjust = 0.5, size = 13.5, margin = margin(b = 0))
  ) +
  {
    y_positions = seq_along(levels(heatmap_data$outcome_labeled))
    labels = levels(heatmap_data$outcome_labeled)
    
    lapply(seq_along(labels), function(i) {
      annotate("text", x = 0.5, y = y_positions[i],
               label = labels[i], size = 2.8, hjust = 1, angle = 7)
    })
  }
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

Code
save_plot("1_rq1_heatmap_twinparticipationpredictors", width = 8, height = 8, trim = TRUE)

AUC and Variance Explained Plots

Code
model_metrics = do.call("rbind.data.frame", glm_model_comparison_results) %>%
  filter(Variables_full=="None") %>%
  select(-contains("Delta"),-contains("Wald")) %>%
  mutate(
    outcome = rq1y_twin_labels_clean_extrashort[match(.$outcome, rq1y_twin1)],
    outcome = factor(outcome, levels = rq1y_twin_labels_clean_extrashort)
  ) 

model_metrics
Code
plot_data = model_metrics %>%
  select(outcome, AUC, mcfad_r2) %>%
  mutate(
    AUC_scaled = -(AUC - 0.5),
    R2_scaled = mcfad_r2 * 2.6
  ) %>%
  pivot_longer(cols = c(AUC_scaled, R2_scaled), names_to = "metric", values_to = "value_scaled") %>%
  mutate(
    metric_label = case_when(
      metric == "AUC_scaled" ~ "AUC",
      metric == "R2_scaled" ~ "McFadden's R²"
    ),
    original_value = ifelse(metric == "AUC_scaled", AUC, mcfad_r2),
    label_value    = case_when(
      metric == "AUC_scaled" ~ gbtoolbox::apa_num(original_value,3),
      metric == "R2_scaled" ~ paste0(gbtoolbox::apa_num(original_value*100,2),"%")
    )
  )

ggplot(plot_data, aes(y = outcome, x = value_scaled, fill = metric_label)) +
  geom_col(width = 0.7, alpha = 0.8) +
  geom_text(aes(label = label_value), 
            hjust = ifelse(plot_data$metric == "AUC_scaled", 1.1, -0.1),
            size = 2.9) +
  scale_fill_manual(values = c("AUC" = "#5e3c99", "McFadden's R²" = "#e66101")) +
  labs(
    tag = "A",
    title = "Participation Prediction Accuracy",
    # subtitle = "Accuracy in predicting \nparticipation at each Time Point",
    x = NULL,
    y = NULL,
    fill = NULL
  ) +
  theme_minimal() +
  coord_cartesian(clip = "off") +
  theme(
    legend.position = "bottom",
    axis.text.y = element_text(size = 10.5, angle = 0, margin = margin(r = 10, unit = "pt"), color = "black"),
    # panel.grid.major.x = element_line(color = "grey90"),
    panel.grid = element_blank(),
    plot.margin = margin(t = 20, r = 20, b = 50, l = 20, unit = "pt"),
    axis.text.x = element_blank(),
    plot.tag = element_text(hjust = 0, vjust = 0, size = 30, face = "bold"),
    plot.tag.position = "topleft",
    plot.title = element_text(hjust = 0.5, size = 16)
  ) +
  geom_vline(xintercept = 0, color = "black", size = 0.5)

Code
save_plot("1_rq1_twinlevel_prediction_accuracy", width = 5, height = 8, trim = TRUE)

Twin-Level Logistic Regression Coefficients

Code
# Extract all coefficients from twin models and combine
twinmodels_results0 = do.call(rbind, lapply(1:length(twinmodels), function(i) {
    vcov_robust = clubSandwich::vcovCR(twinmodels[[i]], 
                                    cluster = twinmodels[[i]]$data$famid, 
                                    type = "CR2") 
    coefs       = clubSandwich::conf_int(twinmodels[[i]], vcov = vcov_robust, test = "naive-tp")

    coefs$outcome = rq1y_twin1[i] 
  return(coefs)
}))
Registered S3 method overwritten by 'clubSandwich':
  method    from    
  bread.mlm sandwich
Code
twinmodels_results = twinmodels_results0 %>%
  data.frame() %>%
  dplyr::rename(term = "Coef") %>%
  `rownames<-`(NULL)


# # Extract all coefficients from twin models and combine
# twinmodels_results = do.call(rbind, lapply(1:length(twinmodels), function(i) {
#   coefs = broom::tidy(twinmodels[[i]], conf.int = FALSE) %>%
#     select(-std.error, -statistic, -p.value)
#   coefs$outcome = rq1y_twin1[i]
#   return(coefs)
# }))

twinmodels_results$var = str_extract(twinmodels_results$term, paste0("^(", paste(rq1x, collapse = "|"), ")"))
twinmodels_results$var[twinmodels_results0$Coef == "(Intercept)"] = "Intercept"
twinmodels_results$var_outcome = paste(twinmodels_results$var,twinmodels_results$outcome, sep = "_")
# Bonferonni-holm corrected p-value
twinmodels_results$Wald_p = glm_model_comparison_results_df$Wald_p[match(twinmodels_results$var_outcome,glm_model_comparison_results_df$var_outcome)]

twinmodels_results %>%
    mutate(
      outcome = rq1y_twin_labels_clean_extrashort[match(.$outcome, rq1y_twin1)],
      term = case_when(
        term == "(Intercept)" ~ "Intercept",
        term == "sex1" ~ "Twin Sex (Male)",
        term == "amumagetw" ~ "Mother age at birth",
        term == "adadagetw" ~ "Father age at birth",
        term == "aadultscohabiting biological parent with other" ~ "Household Type: Cohabiting parent",
        term == "asinglesingle parent" ~ "Single Parent (ref: cohabiting parents)",
        term == "zygos2" ~ "Zygosity (DZ)",
        term == "amedtot" ~ "Mother medical risk",
        str_starts(term, "afasoc2") & str_detect(term, "^afasoc2\\d$") ~ paste0("Father employment: Level ", str_sub(term, -1)),
        term == "afasoc2caring for children at home" ~ "Father employment: Caring for children",
        term == "afasoc2no job" ~ "Father employment: No job",
        term == "afahqualno qualifications" ~ "Father education: No qualifications",
        term == "afahqualCSE grade 2-5 or O-level/GCSE grade D-G" ~ "Father education: CSE/GCSE D-G",
        term == "afahqualA-level or S-level" ~ "Father education: A-level",
        term == "afahqualHNC" ~ "Father education: HNC",
        term == "afahqualHND" ~ "Father education: HND",
        term == "afahqualundergraduate degree" ~ "Father education: Undergraduate",
        term == "afahqualpostgraduate qualification" ~ "Father education: Postgraduate",
        str_starts(term, "amosoc2") & str_detect(term, "^amosoc2\\d$") ~ paste0("Mother employment: Level ", str_sub(term, -1)),
        term == "amosoc2no job" ~ "Mother employment: No job",
        term == "amohqualCSE grade 2-5 or O-level/GCSE grade D-G" ~ "Mother education: CSE/GCSE D-G",
        term == "amohqualCSE grade 1 or O-level/GCSE grade A-C" ~ "Mother education: CSE/GCSE A-C",
        term == "amohqualA-level or S-level" ~ "Mother education: A-level",
        term == "amohqualHNC" ~ "Mother education: HNC",
        term == "amohqualHND" ~ "Mother education: HND",
        term == "amohqualundergraduate degree" ~ "Mother education: Undergraduate",
        term == "amohqualpostgraduate qualification" ~ "Mother education: Postgraduate",
        term == "atwmed1" ~ "Twin medical risk",
        term == "aethniccAsian" ~ "Ethnic origin: Asian",
        term == "aethniccBlack" ~ "Ethnic origin: Black",
        term == "aethniccMixed race" ~ "Ethnic origin: Mixed race",
        term == "aethniccOther" ~ "Ethnic origin: Other",
        term == "alangEnglish" ~ "Language: English only",
        term == "alangEnglish + other" ~ "Language: English + other",
        str_starts(term, "anoldsib") & str_detect(term, "\\d+$") ~ paste0("Older siblings: ", str_extract(term, "\\d+")),
        term == "anoldsib5 or more" ~ "Older siblings: 5+",
        term == "anyngsib1" ~ "Younger siblings: 1",
        term == "anyngsib2 or more" ~ "Younger siblings: 2+",
        term == "atwclub1" ~ "Twins club member",
        term == "alookels1" ~ "Childcare by others",
        term == "asmoke1" ~ "Smoking in pregnancy",
        term == "adrink1" ~ "Alcohol in pregnancy",
        term == "astress1" ~ "Severe stress in pregnancy",
        term == "cens01pop98density" ~ "Population density",
        term == "pollution1998pca" ~ "Pollution index",
        TRUE ~ term
      )
    ) %>%
  mutate(
    beta = gbtoolbox::apa_num(beta, n_decimal_places = 3),
    CI_L = gbtoolbox::apa_num(CI_L, n_decimal_places = 3),
    CI_U = gbtoolbox::apa_num(CI_U, n_decimal_places = 3),
    SE   = gbtoolbox::apa_num(SE,   n_decimal_places = 3),
    Wald_p_clean = gbtoolbox::apa_num(Wald_p, n_decimal_places = 3),

    table_val = paste0(beta,"\n[",CI_L,",\n",CI_U,"]\np=",Wald_p_clean,"\nSE=", SE = SE)
  ) %>%
  select(term, table_val, Wald_p, outcome) %>%
  pivot_wider(
    id_cols = term,
    values_from = c("table_val", "Wald_p"),
    names_from = outcome
  ) %>%
  gt() %>%
  tab_style(
    style = cell_text(whitespace = "pre-wrap"),
    locations = cells_body(columns = starts_with("table_val"))
  ) %>%
  # tab_style(
  #   style = cell_fill(color = "#d5e8d4"),
  #   locations = cells_body(columns = "table_val_Y2", rows = Wald_p_Y2 < 0.05)
  # ) %>%
  # tab_style(
  #   style = cell_fill(color = "#d5e8d4"),
  #   locations = cells_body(columns = "table_val_Y3", rows = Wald_p_Y3 < 0.05)
  # ) %>%
  tab_style(
    style = cell_fill(color = "#d5e8d4"),
    locations = cells_body(columns = "table_val_Y4", rows = Wald_p_Y4 < 0.05)
  ) %>%
  # tab_style(
  #   style = cell_fill(color = "#d5e8d4"),
  #   locations = cells_body(columns = "table_val_Y7", rows = Wald_p_Y7 < 0.05)
  # ) %>%
  # tab_style(
  #   style = cell_fill(color = "#d5e8d4"),
  #   locations = cells_body(columns = "table_val_Y9", rows = Wald_p_Y9 < 0.05)
  # ) %>%
  # tab_style(
  #   style = cell_fill(color = "#d5e8d4"),
  #   locations = cells_body(columns = "table_val_Y10", rows = Wald_p_Y10 < 0.05)
  # ) %>%
  tab_style(
    style = cell_fill(color = "#d5e8d4"),
    locations = cells_body(columns = "table_val_Y12 (web test)", rows = `Wald_p_Y12 (web test)` < 0.05)
  ) %>%
  tab_style(
    style = cell_fill(color = "#d5e8d4"),
    locations = cells_body(columns = "table_val_Y12 (q'aire)", rows = `Wald_p_Y12 (q'aire)` < 0.05)
  ) %>%
  # tab_style(
  #   style = cell_fill(color = "#d5e8d4"),
  #   locations = cells_body(columns = "table_val_Y16 (web)", rows = `Wald_p_Y16 (web)` < 0.05)
  # ) %>%
  tab_style(
    style = cell_fill(color = "#d5e8d4"),
    locations = cells_body(columns = "table_val_Y16 (q'aire)", rows = `Wald_p_Y16 (q'aire)` < 0.05)
  ) %>%
  tab_style(
    style = cell_fill(color = "#d5e8d4"),
    locations = cells_body(columns = "table_val_Y18", rows = Wald_p_Y18 < 0.05)
  ) %>%
  tab_style(
    style = cell_fill(color = "#d5e8d4"),
    locations = cells_body(columns = "table_val_Y21", rows = Wald_p_Y21 < 0.05)
  ) %>%
  tab_style(
    style = cell_fill(color = "#d5e8d4"),
    locations = cells_body(columns = "table_val_Y26 (q'aire)", rows = `Wald_p_Y26 (q'aire)` < 0.05)
  ) %>%
  tab_style(
    style = cell_fill(color = "#d5e8d4"),
    locations = cells_body(columns = "table_val_Y26 (web test)", rows = `Wald_p_Y26 (web test)` < 0.05)
  ) %>%
  cols_hide(columns = starts_with("Wald_p")) %>%
  cols_label_with(
    fn = function(x) str_remove(x, "table_val_")
  ) %>%
  tab_options(
      table.width = "100%",
      table.font.size = "12px"
    ) %>%
      cols_width(
      term ~ px(90),
      starts_with("table_val") ~ px(40)
    ) %>%
  tab_source_note(
    source_note = "Note: Confidence intervals calculated using cluster robust standard errors clustered by family, calculated using the clubSandwich R package with CR2 (bias-reduced linearization) adjustment and naive-tp small sample adjustment. Cluster robust Wald tests were used to calculate using the ClubSandwich R package. Note that for dummy variables, Wald tests were performed by constraining all dummy variables in a given set to zero - therefore dummy variables derived from the same categorical variable have the same p-values."
  ) 
term Y4 Y12 (web test) Y12 (q'aire) Y16 (q'aire) Y18 Y21 Y26 (q'aire) Y26 (web test)
Intercept -1.193 [-1.642, -.745] p= SE= .229 -1.688 [-2.129, -1.247] p= SE= .225 -1.631 [-2.079, -1.183] p= SE= .229 -1.658 [-2.119, -1.197] p= SE= .235 -1.495 [-1.946, -1.044] p= SE= .230 -1.363 [-1.794, -.933] p= SE= .220 -1.207 [-1.620, -.794] p= SE= .211 -2.256 [-2.787, -1.724] p= SE= .271
Twin Sex (Male) -.109 [-.182, -.036] p= .042 SE= .037 -.231 [-.302, -.160] p= .000 SE= .036 -.182 [-.254, -.110] p= .000 SE= .037 -.393 [-.467, -.319] p= .000 SE= .038 -.253 [-.326, -.181] p= .000 SE= .037 -.682 [-.752, -.613] p= .000 SE= .035 -.739 [-.806, -.672] p= .000 SE= .034 -.688 [-.776, -.601] p= .000 SE= .045
Mother age at birth .035 [ .025, .046] p= .000 SE= .005 .027 [ .017, .037] p= .000 SE= .005 .028 [ .018, .039] p= .000 SE= .005 .031 [ .021, .042] p= .000 SE= .005 .047 [ .037, .058] p= .000 SE= .005 .014 [ .004, .024] p= .050 SE= .005 .014 [ .004, .023] p= .073 SE= .005 .012 [-.000, .025] p= .471 SE= .006
Father age at birth .010 [ .002, .019] p= .113 SE= .004 .005 [-.003, .013] p= .735 SE= .004 .004 [-.004, .012] p=1.000 SE= .004 -.001 [-.009, .007] p=1.000 SE= .004 .005 [-.003, .013] p=1.000 SE= .004 .009 [ .002, .017] p= .168 SE= .004 .007 [-.001, .014] p= .795 SE= .004 .008 [-.002, .017] p= .857 SE= .005
Single Parent (ref: cohabiting parents) -.412 [-.571, -.253] p= .000 SE= .081 -.415 [-.580, -.249] p= .000 SE= .084 -.351 [-.516, -.185] p= .001 SE= .085 -.554 [-.733, -.376] p= .000 SE= .091 -.447 [-.609, -.285] p= .000 SE= .083 -.607 [-.773, -.441] p= .000 SE= .085 -.455 [-.614, -.297] p= .000 SE= .081 -.475 [-.694, -.255] p= .000 SE= .112
Zygosity (DZ) -.160 [-.239, -.081] p= .001 SE= .040 -.343 [-.419, -.266] p= .000 SE= .039 -.323 [-.401, -.246] p= .000 SE= .040 -.286 [-.366, -.207] p= .000 SE= .040 -.311 [-.390, -.233] p= .000 SE= .040 -.290 [-.364, -.215] p= .000 SE= .038 -.271 [-.343, -.199] p= .000 SE= .037 -.329 [-.421, -.238] p= .000 SE= .047
Mother medical risk -.025 [-.070, .021] p=1.000 SE= .023 -.043 [-.087, .001] p= .395 SE= .023 -.021 [-.066, .024] p=1.000 SE= .023 -.054 [-.101, -.008] p= .206 SE= .024 -.082 [-.127, -.038] p= .003 SE= .023 -.021 [-.063, .022] p=1.000 SE= .022 -.030 [-.072, .011] p= .924 SE= .021 -.034 [-.088, .019] p=1.000 SE= .027
Father employment: Level 2 .138 [-.002, .278] p= .923 SE= .071 .092 [-.040, .223] p= .444 SE= .067 .045 [-.088, .178] p=1.000 SE= .068 .105 [-.028, .239] p= .927 SE= .068 .138 [ .000, .275] p= .052 SE= .070 .125 [-.001, .252] p= .378 SE= .065 .113 [-.008, .233] p= .795 SE= .061 .148 [ .002, .295] p=1.000 SE= .075
Father employment: Level 3 -.078 [-.227, .071] p= .923 SE= .076 .187 [ .044, .331] p= .444 SE= .073 .101 [-.044, .246] p=1.000 SE= .074 .055 [-.094, .203] p= .927 SE= .076 .067 [-.081, .215] p= .052 SE= .076 .079 [-.060, .218] p= .378 SE= .071 .055 [-.080, .189] p= .795 SE= .069 .111 [-.054, .276] p=1.000 SE= .084
Father employment: Level 4 .037 [-.164, .238] p= .923 SE= .102 .245 [ .049, .442] p= .444 SE= .100 .178 [-.021, .377] p=1.000 SE= .101 .160 [-.044, .364] p= .927 SE= .104 .046 [-.154, .246] p= .052 SE= .102 .115 [-.077, .306] p= .378 SE= .098 .119 [-.068, .305] p= .795 SE= .095 .157 [-.081, .395] p=1.000 SE= .121
Father employment: Level 5 -.048 [-.166, .071] p= .923 SE= .060 .009 [-.107, .124] p= .444 SE= .059 -.020 [-.136, .097] p=1.000 SE= .059 -.097 [-.217, .024] p= .927 SE= .061 -.104 [-.221, .013] p= .052 SE= .060 -.114 [-.226, -.001] p= .378 SE= .057 -.079 [-.189, .030] p= .795 SE= .056 -.040 [-.184, .104] p=1.000 SE= .074
Father employment: Level 6 -.097 [-.273, .078] p= .923 SE= .090 .134 [-.036, .303] p= .444 SE= .087 .101 [-.071, .273] p=1.000 SE= .088 .146 [-.029, .322] p= .927 SE= .090 .043 [-.129, .215] p= .052 SE= .088 -.031 [-.196, .135] p= .378 SE= .085 .062 [-.099, .222] p= .795 SE= .082 .113 [-.090, .317] p=1.000 SE= .104
Father employment: Level 7 -.230 [-.455, -.005] p= .923 SE= .115 .211 [-.012, .433] p= .444 SE= .113 .139 [-.089, .367] p=1.000 SE= .116 -.050 [-.286, .186] p= .927 SE= .120 .010 [-.212, .231] p= .052 SE= .113 -.077 [-.299, .144] p= .378 SE= .113 -.044 [-.248, .160] p= .795 SE= .104 .027 [-.239, .293] p=1.000 SE= .136
Father employment: Level 8 .022 [-.127, .172] p= .923 SE= .076 .032 [-.115, .180] p= .444 SE= .075 -.065 [-.215, .084] p=1.000 SE= .076 .012 [-.141, .166] p= .927 SE= .078 -.075 [-.223, .072] p= .052 SE= .075 -.044 [-.187, .099] p= .378 SE= .073 .006 [-.132, .143] p= .795 SE= .070 -.068 [-.259, .122] p=1.000 SE= .097
Father employment: Level 9 .075 [-.123, .273] p= .923 SE= .101 .134 [-.063, .330] p= .444 SE= .100 .126 [-.072, .324] p=1.000 SE= .101 .049 [-.158, .256] p= .927 SE= .106 .130 [-.066, .325] p= .052 SE= .100 -.051 [-.245, .142] p= .378 SE= .099 -.075 [-.263, .113] p= .795 SE= .096 .005 [-.248, .258] p=1.000 SE= .129
Father employment: Caring for children .010 [-.255, .276] p= .923 SE= .135 .016 [-.243, .275] p= .444 SE= .132 .026 [-.238, .289] p=1.000 SE= .135 .014 [-.262, .289] p= .927 SE= .140 -.202 [-.462, .058] p= .052 SE= .133 -.120 [-.384, .143] p= .378 SE= .134 -.128 [-.386, .130] p= .795 SE= .132 .026 [-.303, .355] p=1.000 SE= .168
Father employment: No job -.006 [-.182, .169] p= .923 SE= .090 -.000 [-.178, .177] p= .444 SE= .091 -.012 [-.192, .168] p=1.000 SE= .092 -.087 [-.274, .100] p= .927 SE= .096 -.247 [-.425, -.068] p= .052 SE= .091 -.185 [-.359, -.010] p= .378 SE= .089 -.177 [-.349, -.006] p= .795 SE= .087 -.043 [-.269, .183] p=1.000 SE= .116
Father education: No qualifications -.184 [-.315, -.053] p= .099 SE= .067 -.209 [-.341, -.076] p= .059 SE= .067 -.156 [-.291, -.022] p= .043 SE= .068 -.171 [-.311, -.030] p= .206 SE= .072 -.184 [-.315, -.054] p= .091 SE= .067 -.183 [-.313, -.054] p= .023 SE= .066 -.210 [-.337, -.083] p= .443 SE= .065 -.191 [-.366, -.017] p= .024 SE= .089
Father education: CSE/GCSE D-G -.100 [-.219, .018] p= .099 SE= .061 -.096 [-.214, .022] p= .059 SE= .060 -.064 [-.184, .056] p= .043 SE= .061 -.105 [-.229, .019] p= .206 SE= .063 -.078 [-.196, .040] p= .091 SE= .060 -.131 [-.247, -.014] p= .023 SE= .059 -.068 [-.180, .044] p= .443 SE= .057 -.043 [-.193, .108] p= .024 SE= .077
Father education: A-level -.153 [-.281, -.024] p= .099 SE= .066 -.136 [-.261, -.010] p= .059 SE= .064 -.115 [-.243, .012] p= .043 SE= .065 -.032 [-.164, .099] p= .206 SE= .067 -.070 [-.198, .057] p= .091 SE= .065 -.038 [-.162, .085] p= .023 SE= .063 .007 [-.112, .126] p= .443 SE= .061 .017 [-.138, .172] p= .024 SE= .079
Father education: HNC .060 [-.106, .227] p= .099 SE= .085 .013 [-.146, .173] p= .059 SE= .081 .093 [-.068, .255] p= .043 SE= .082 .076 [-.089, .240] p= .206 SE= .084 .113 [-.050, .276] p= .091 SE= .083 .002 [-.151, .156] p= .023 SE= .078 -.000 [-.150, .149] p= .443 SE= .076 .021 [-.173, .215] p= .024 SE= .099
Father education: HND -.120 [-.296, .057] p= .099 SE= .090 .049 [-.119, .217] p= .059 SE= .086 -.024 [-.195, .147] p= .043 SE= .087 .040 [-.133, .213] p= .206 SE= .088 -.074 [-.246, .098] p= .091 SE= .088 .021 [-.143, .185] p= .023 SE= .084 .015 [-.144, .174] p= .443 SE= .081 .071 [-.129, .271] p= .024 SE= .102
Father education: Undergraduate .036 [-.112, .185] p= .099 SE= .076 .099 [-.043, .240] p= .059 SE= .072 .168 [ .025, .311] p= .043 SE= .073 .136 [-.009, .282] p= .206 SE= .074 .094 [-.051, .240] p= .091 SE= .074 .164 [ .029, .300] p= .023 SE= .069 .064 [-.067, .194] p= .443 SE= .067 .268 [ .106, .429] p= .024 SE= .083
Father education: Postgraduate -.137 [-.311, .038] p= .099 SE= .089 -.003 [-.168, .163] p= .059 SE= .084 .053 [-.115, .222] p= .043 SE= .086 .144 [-.027, .315] p= .206 SE= .087 -.078 [-.250, .094] p= .091 SE= .088 .148 [-.011, .307] p= .023 SE= .081 .063 [-.089, .216] p= .443 SE= .078 .269 [ .079, .459] p= .024 SE= .097
Mother employment: Level 1 -.294 [-.458, -.130] p= .047 SE= .084 -.009 [-.164, .146] p= .000 SE= .079 .031 [-.127, .190] p= .001 SE= .081 -.042 [-.202, .117] p= .000 SE= .081 .120 [-.041, .281] p= .000 SE= .082 .029 [-.121, .179] p= .004 SE= .076 .028 [-.116, .172] p= .126 SE= .073 -.011 [-.192, .169] p=1.000 SE= .092
Mother employment: Level 2 -.096 [-.288, .095] p= .047 SE= .098 .177 [ .003, .352] p= .000 SE= .089 .071 [-.105, .247] p= .001 SE= .090 .042 [-.132, .217] p= .000 SE= .089 .193 [ .005, .381] p= .000 SE= .096 .079 [-.088, .245] p= .004 SE= .085 -.018 [-.174, .138] p= .126 SE= .080 -.094 [-.284, .095] p=1.000 SE= .097
Mother employment: Level 3 -.035 [-.196, .125] p= .047 SE= .082 .325 [ .176, .474] p= .000 SE= .076 .299 [ .146, .452] p= .001 SE= .078 .289 [ .137, .441] p= .000 SE= .078 .449 [ .289, .608] p= .000 SE= .081 .243 [ .103, .384] p= .004 SE= .072 .218 [ .084, .353] p= .126 SE= .069 .066 [-.101, .234] p=1.000 SE= .086
Mother employment: Level 4 -.075 [-.201, .052] p= .047 SE= .065 .039 [-.084, .163] p= .000 SE= .063 -.031 [-.155, .094] p= .001 SE= .064 -.033 [-.161, .095] p= .000 SE= .065 .083 [-.043, .208] p= .000 SE= .064 -.050 [-.172, .072] p= .004 SE= .062 -.020 [-.138, .098] p= .126 SE= .060 -.125 [-.280, .029] p=1.000 SE= .079
Mother employment: Level 5 -.153 [-.571, .265] p= .047 SE= .213 .188 [-.207, .583] p= .000 SE= .201 -.030 [-.429, .368] p= .001 SE= .203 .019 [-.412, .449] p= .000 SE= .220 .081 [-.338, .500] p= .000 SE= .214 .067 [-.313, .448] p= .004 SE= .194 .272 [-.076, .621] p= .126 SE= .178 .101 [-.349, .552] p=1.000 SE= .230
Mother employment: Level 6 -.141 [-.302, .020] p= .047 SE= .082 .118 [-.041, .277] p= .000 SE= .081 .065 [-.096, .226] p= .001 SE= .082 .015 [-.149, .179] p= .000 SE= .084 -.052 [-.212, .108] p= .000 SE= .082 -.079 [-.233, .074] p= .004 SE= .078 -.020 [-.171, .131] p= .126 SE= .077 -.188 [-.388, .012] p=1.000 SE= .102
Mother employment: Level 7 -.186 [-.366, -.007] p= .047 SE= .091 -.004 [-.181, .174] p= .000 SE= .091 -.088 [-.267, .092] p= .001 SE= .091 -.226 [-.415, -.037] p= .000 SE= .097 -.214 [-.393, -.036] p= .000 SE= .091 -.204 [-.381, -.026] p= .004 SE= .090 -.111 [-.283, .060] p= .126 SE= .088 -.074 [-.301, .154] p=1.000 SE= .116
Mother employment: Level 8 -.052 [-.415, .311] p= .047 SE= .185 -.047 [-.415, .320] p= .000 SE= .188 -.194 [-.571, .183] p= .001 SE= .192 -.334 [-.748, .080] p= .000 SE= .211 -.373 [-.732, -.014] p= .000 SE= .183 -.342 [-.727, .043] p= .004 SE= .196 -.083 [-.441, .276] p= .126 SE= .183 -.181 [-.665, .302] p=1.000 SE= .247
Mother employment: Level 9 .028 [-.225, .281] p= .047 SE= .129 .215 [-.037, .467] p= .000 SE= .129 .158 [-.099, .414] p= .001 SE= .131 -.084 [-.354, .187] p= .000 SE= .138 .041 [-.215, .296] p= .000 SE= .130 .066 [-.182, .314] p= .004 SE= .127 .085 [-.149, .319] p= .126 SE= .119 .016 [-.298, .330] p=1.000 SE= .160
Mother employment: No job -.206 [-.319, -.094] p= .047 SE= .057 -.172 [-.286, -.058] p= .000 SE= .058 -.212 [-.327, -.096] p= .001 SE= .059 -.279 [-.401, -.157] p= .000 SE= .062 -.189 [-.302, -.076] p= .000 SE= .057 -.163 [-.275, -.051] p= .004 SE= .057 -.122 [-.231, -.012] p= .126 SE= .056 -.157 [-.305, -.009] p=1.000 SE= .075
Mother education: CSE/GCSE D-G .301 [ .147, .454] p= .000 SE= .078 .211 [ .046, .376] p= .000 SE= .084 .206 [ .040, .372] p= .000 SE= .085 .198 [ .021, .375] p= .000 SE= .090 .292 [ .132, .452] p= .000 SE= .082 .242 [ .078, .406] p= .000 SE= .084 .140 [-.019, .300] p= .000 SE= .081 .018 [-.209, .244] p= .000 SE= .116
Mother education: CSE/GCSE A-C .438 [ .296, .580] p= .000 SE= .072 .499 [ .349, .650] p= .000 SE= .077 .428 [ .276, .579] p= .000 SE= .077 .422 [ .260, .584] p= .000 SE= .083 .546 [ .399, .693] p= .000 SE= .075 .468 [ .318, .618] p= .000 SE= .077 .342 [ .196, .489] p= .000 SE= .075 .300 [ .095, .505] p= .000 SE= .105
Mother education: A-level .463 [ .297, .629] p= .000 SE= .085 .647 [ .475, .819] p= .000 SE= .088 .592 [ .419, .765] p= .000 SE= .088 .612 [ .430, .794] p= .000 SE= .093 .690 [ .520, .859] p= .000 SE= .087 .588 [ .418, .757] p= .000 SE= .087 .524 [ .359, .689] p= .000 SE= .084 .521 [ .298, .744] p= .000 SE= .114
Mother education: HNC .704 [ .445, .963] p= .000 SE= .132 .717 [ .463, .972] p= .000 SE= .130 .587 [ .330, .844] p= .000 SE= .131 .540 [ .277, .803] p= .000 SE= .134 .731 [ .476, .986] p= .000 SE= .130 .612 [ .370, .855] p= .000 SE= .124 .503 [ .264, .742] p= .000 SE= .122 .416 [ .100, .731] p= .000 SE= .161
Mother education: HND .561 [ .329, .793] p= .000 SE= .118 .767 [ .539, .996] p= .000 SE= .117 .717 [ .484, .950] p= .000 SE= .119 .495 [ .255, .735] p= .000 SE= .123 .751 [ .520, .982] p= .000 SE= .118 .441 [ .216, .666] p= .000 SE= .115 .323 [ .108, .537] p= .000 SE= .109 .290 [ .003, .577] p= .000 SE= .146
Mother education: Undergraduate .765 [ .573, .957] p= .000 SE= .098 .829 [ .639, 1.019] p= .000 SE= .097 .766 [ .574, .958] p= .000 SE= .098 .741 [ .542, .941] p= .000 SE= .102 .887 [ .694, 1.079] p= .000 SE= .098 .752 [ .566, .938] p= .000 SE= .095 .692 [ .512, .871] p= .000 SE= .091 .502 [ .264, .740] p= .000 SE= .122
Mother education: Postgraduate .956 [ .717, 1.195] p= .000 SE= .122 .709 [ .483, .934] p= .000 SE= .115 .631 [ .403, .860] p= .000 SE= .117 .609 [ .374, .844] p= .000 SE= .120 .815 [ .584, 1.046] p= .000 SE= .118 .750 [ .531, .970] p= .000 SE= .112 .690 [ .479, .900] p= .000 SE= .107 .568 [ .298, .839] p= .000 SE= .138
Twin medical risk -.003 [-.044, .039] p=1.000 SE= .021 -.031 [-.071, .009] p= .664 SE= .021 -.041 [-.082, -.000] p= .381 SE= .021 -.022 [-.064, .019] p=1.000 SE= .021 -.013 [-.054, .028] p=1.000 SE= .021 -.008 [-.047, .031] p=1.000 SE= .020 .002 [-.036, .039] p=1.000 SE= .019 .052 [ .005, .100] p= .345 SE= .024
Ethnic origin: Asian .052 [-.258, .362] p= .031 SE= .158 .087 [-.227, .401] p= .005 SE= .160 .126 [-.193, .444] p= .042 SE= .163 .171 [-.151, .493] p= .018 SE= .164 .262 [-.056, .579] p= .004 SE= .162 .347 [ .053, .641] p= .000 SE= .150 .016 [-.266, .297] p= .001 SE= .144 .402 [ .078, .726] p= .002 SE= .165
Ethnic origin: Black -.524 [-.819, -.230] p= .031 SE= .150 -.453 [-.766, -.141] p= .005 SE= .159 -.509 [-.825, -.193] p= .042 SE= .161 -.580 [-.923, -.236] p= .018 SE= .175 -.311 [-.613, -.010] p= .004 SE= .154 -.523 [-.839, -.207] p= .000 SE= .161 -.529 [-.820, -.239] p= .001 SE= .148 -.864 [-1.366, -.361] p= .002 SE= .256
Ethnic origin: Mixed race -.139 [-.354, .077] p= .031 SE= .110 -.397 [-.621, -.174] p= .005 SE= .114 -.257 [-.481, -.033] p= .042 SE= .114 -.279 [-.513, -.044] p= .018 SE= .120 -.341 [-.563, -.119] p= .004 SE= .113 -.383 [-.603, -.163] p= .000 SE= .112 -.385 [-.598, -.173] p= .001 SE= .108 -.281 [-.556, -.006] p= .002 SE= .140
Ethnic origin: Other -.396 [-.843, .050] p= .031 SE= .228 -.125 [-.597, .347] p= .005 SE= .241 .065 [-.395, .526] p= .042 SE= .235 .027 [-.466, .520] p= .018 SE= .252 -.493 [-.989, .003] p= .004 SE= .253 -.159 [-.628, .310] p= .000 SE= .239 .140 [-.327, .608] p= .001 SE= .238 .224 [-.333, .780] p= .002 SE= .284
Language: English only .241 [-.064, .546] p= .324 SE= .156 .260 [-.041, .562] p= .059 SE= .154 .281 [-.028, .590] p= .077 SE= .158 .266 [-.049, .581] p=1.000 SE= .161 -.105 [-.415, .206] p=1.000 SE= .158 .259 [-.031, .548] p= .498 SE= .148 .078 [-.199, .355] p=1.000 SE= .141 .137 [-.214, .489] p= .857 SE= .179
Language: English + other -.112 [-.482, .258] p= .324 SE= .189 -.232 [-.615, .152] p= .059 SE= .196 -.199 [-.590, .193] p= .077 SE= .200 .040 [-.367, .447] p=1.000 SE= .207 -.074 [-.452, .303] p=1.000 SE= .193 -.021 [-.386, .344] p= .498 SE= .186 -.181 [-.536, .173] p=1.000 SE= .181 -.230 [-.691, .231] p= .857 SE= .235
Older siblings: 1 -.204 [-.289, -.118] p= .000 SE= .044 -.018 [-.101, .064] p= .003 SE= .042 -.053 [-.137, .030] p= .007 SE= .043 -.074 [-.159, .012] p= .014 SE= .044 -.132 [-.216, -.048] p= .000 SE= .043 -.093 [-.174, -.013] p= .005 SE= .041 -.112 [-.190, -.035] p= .061 SE= .040 -.117 [-.217, -.018] p= .125 SE= .051
Older siblings: 2 -.289 [-.407, -.171] p= .000 SE= .060 -.097 [-.212, .019] p= .003 SE= .059 -.165 [-.282, -.047] p= .007 SE= .060 -.176 [-.298, -.054] p= .014 SE= .062 -.295 [-.412, -.178] p= .000 SE= .060 -.179 [-.292, -.066] p= .005 SE= .058 -.196 [-.307, -.085] p= .061 SE= .057 -.107 [-.248, .035] p= .125 SE= .072
Older siblings: 3 -.564 [-.747, -.380] p= .000 SE= .094 -.223 [-.415, -.032] p= .003 SE= .098 -.257 [-.450, -.064] p= .007 SE= .098 -.303 [-.509, -.098] p= .014 SE= .105 -.414 [-.605, -.222] p= .000 SE= .098 -.282 [-.470, -.093] p= .005 SE= .096 -.173 [-.355, .008] p= .061 SE= .093 -.342 [-.598, -.087] p= .125 SE= .130
Older siblings: 4 -.525 [-.875, -.174] p= .000 SE= .179 -.673 [-1.068, -.277] p= .003 SE= .202 -.505 [-.882, -.128] p= .007 SE= .192 -.586 [-.997, -.174] p= .014 SE= .210 -.517 [-.886, -.147] p= .000 SE= .188 -.559 [-.931, -.188] p= .005 SE= .189 -.045 [-.398, .308] p= .061 SE= .180 -.278 [-.757, .202] p= .125 SE= .245
Older siblings: 5+ -1.061 [-1.652, -.470] p= .000 SE= .301 -1.074 [-1.792, -.356] p= .003 SE= .366 -.858 [-1.532, -.185] p= .007 SE= .344 -.404 [-1.071, .262] p= .014 SE= .340 -.990 [-1.637, -.343] p= .000 SE= .330 -.371 [-.951, .208] p= .005 SE= .296 -.387 [-.991, .217] p= .061 SE= .308 -1.221 [-2.308, -.135] p= .125 SE= .554
Younger siblings: 1 -.187 [-.398, .025] p= .936 SE= .108 -.200 [-.412, .012] p= .709 SE= .108 -.248 [-.466, -.029] p= .588 SE= .111 -.121 [-.341, .099] p=1.000 SE= .112 -.134 [-.347, .079] p=1.000 SE= .109 -.128 [-.336, .081] p=1.000 SE= .106 -.142 [-.346, .062] p= .900 SE= .104 -.037 [-.290, .215] p=1.000 SE= .129
Younger siblings: 2+ -.232 [-.949, .485] p= .936 SE= .366 -.073 [-.747, .600] p= .709 SE= .344 -.045 [-.762, .672] p= .588 SE= .366 -.232 [-.955, .491] p=1.000 SE= .369 -.086 [-.708, .536] p=1.000 SE= .317 -.443 [-1.083, .197] p=1.000 SE= .326 -.584 [-1.340, .172] p= .900 SE= .386 -1.089 [-2.313, .135] p=1.000 SE= .625
Twins club member .215 [ .133, .298] p= .000 SE= .042 .318 [ .240, .396] p= .000 SE= .040 .276 [ .197, .356] p= .000 SE= .040 .308 [ .228, .388] p= .000 SE= .041 .248 [ .168, .329] p= .000 SE= .041 .315 [ .239, .391] p= .000 SE= .039 .243 [ .170, .316] p= .000 SE= .037 .280 [ .189, .371] p= .000 SE= .047
Childcare by others -.136 [-.244, -.027] p= .114 SE= .055 -.038 [-.144, .068] p= .967 SE= .054 -.059 [-.166, .049] p=1.000 SE= .055 -.047 [-.158, .063] p=1.000 SE= .056 .130 [ .022, .239] p= .151 SE= .056 -.018 [-.121, .084] p=1.000 SE= .052 .020 [-.079, .120] p=1.000 SE= .051 .028 [-.099, .154] p=1.000 SE= .064
Smoking in pregnancy -.222 [-.323, -.120] p= .000 SE= .052 -.147 [-.250, -.044] p= .059 SE= .052 -.227 [-.332, -.123] p= .000 SE= .053 -.263 [-.373, -.153] p= .000 SE= .056 -.116 [-.218, -.014] p= .182 SE= .052 -.153 [-.254, -.052] p= .035 SE= .051 -.088 [-.186, .010] p= .795 SE= .050 -.169 [-.299, -.039] p= .132 SE= .066
Alcohol in pregnancy -.013 [-.099, .073] p=1.000 SE= .044 .087 [ .005, .170] p= .303 SE= .042 .064 [-.020, .147] p= .812 SE= .043 .007 [-.079, .092] p=1.000 SE= .044 .036 [-.048, .121] p=1.000 SE= .043 -.053 [-.134, .027] p=1.000 SE= .041 -.051 [-.128, .027] p=1.000 SE= .040 -.100 [-.199, -.001] p= .471 SE= .050
Severe stress in pregnancy -.018 [-.114, .077] p=1.000 SE= .049 -.023 [-.117, .071] p= .967 SE= .048 .014 [-.081, .110] p=1.000 SE= .049 .026 [-.072, .124] p=1.000 SE= .050 .005 [-.089, .099] p=1.000 SE= .048 .010 [-.082, .102] p=1.000 SE= .047 .007 [-.082, .096] p=1.000 SE= .045 -.029 [-.142, .085] p=1.000 SE= .058
Pollution index -.102 [-.141, -.064] p= .000 SE= .020 -.053 [-.091, -.016] p= .059 SE= .019 -.048 [-.086, -.010] p= .118 SE= .019 -.056 [-.095, -.017] p= .053 SE= .020 -.094 [-.132, -.056] p= .000 SE= .019 -.050 [-.086, -.013] p= .075 SE= .019 -.062 [-.097, -.027] p= .008 SE= .018 -.084 [-.129, -.039] p= .003 SE= .023
Note: Confidence intervals calculated using cluster robust standard errors clustered by family, calculated using the clubSandwich R package with CR2 (bias-reduced linearization) adjustment and naive-tp small sample adjustment. Cluster robust Wald tests were used to calculate using the ClubSandwich R package. Note that for dummy variables, Wald tests were performed by constraining all dummy variables in a given set to zero - therefore dummy variables derived from the same categorical variable have the same p-values.

Plot Marginal Effects of maternal education

Code
# Calculate average predictions for maternal education variables

# Calculate predictions for all models

all_predictions = map_dfr(1:length(twinmodels), function(i) {
  data_grid = marginaleffects::datagrid(
    model = twinmodels[[i]],
    amohqual = unique
  )

  pred = predict(twinmodels[[i]], newdata = data_grid, type = "response")
  
  cbind.data.frame(data_grid, estimate = pred) %>%
    mutate(
      outcome = rq1y_twin_labels_clean_extrashort[i],
      amohqual = factor(amohqual, levels = levels(df$amohqual))
    )
})

# Plot all predictions together
all_predictions %>%
  mutate(
    outcome = factor(outcome, levels = rq1y_twin_labels_clean_extrashort)
  ) %>%
  ggplot(aes(x = amohqual, y = estimate, group = outcome, color = outcome)) + 
  geom_point() + 
  geom_line() + 
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(
    title = "Predicted Participation by Maternal Education Level",
    subtitle = "Across all study timepoints",
    x = "Maternal Education Level",
    y = "Predicted Probability",
    color = "Study Wave",
    caption = "Note: Predictions calculated using logistic regression models fitted separately for each study wave.\nAll other covariates held at their observed means (continuous) or modes (categorical)."
  )

Descriptive Statistics

Total number of families in dataset: 13945

Total number of families with no predictor information (and are just completely removed from the analysis): 0

Participation rates at each timepoint

Code
df %>%
  select(all_of(rq1y_twin1)) %>%
  sapply(., function(x) length(which(x==1))/length(which(x>=0)))
 dtwdata1  lcwdata1  lcqdata1 pcbhdata1  rcqdata1  u1cdata1  zmhdata1   zcdata1 
0.6001152 0.4415899 0.4401306 0.3807988 0.5290323 0.3747312 0.3263057 0.1547235 
Code
df %>%
  select(all_of(rq1y_twin1)) %>%
  mutate(across(everything(), as.character)) %>%
  # rename_from_labels() %>%
  pivot_longer(cols = everything()) %>%
  group_by(name) %>%
  count(value) %>%
  # mutate(name_count = n()) %>%
  # filter(name_count < 10) %>%
  # select(-name_count) %>%
  knitr::kable()
name value n
dtwdata1 0 10413
dtwdata1 1 15627
lcqdata1 0 14579
lcqdata1 1 11461
lcwdata1 0 14541
lcwdata1 1 11499
pcbhdata1 0 16124
pcbhdata1 1 9916
rcqdata1 0 12264
rcqdata1 1 13776
u1cdata1 0 16282
u1cdata1 1 9758
zcdata1 0 22011
zcdata1 1 4029
zmhdata1 0 17543
zmhdata1 1 8497
Code
df %>%
  select(all_of(rq1y_twin1)) %>%
  `colnames<-`(rq1y_twin_labels_clean_extrashort) %>%
  # select(where(is.numeric)) %>%
  mutate(across(everything(), as.character)) %>%
  mutate(across(everything(), ~as.character(replace_na(., "NA")))) %>%
  pivot_longer(cols = everything()) %>%
  mutate(name = factor(name, levels = rq1y_twin_labels_clean_extrashort)) %>%
  ggplot(aes(x=value)) +
  geom_bar() +
  facet_wrap(~name, ncol = 4, scales = "free")

Predictor variables (rq1x)

Distribution

Code
df_rq1x %>%
  select(!where(is.numeric)) %>%
  rename_from_labels() %>%
  mutate(across(everything(), as.character)) %>%
  pivot_longer(cols = everything()) %>%
  ggplot(aes(x=value)) +
  geom_bar() +
  facet_wrap(~name, ncol = 4, scales = "free") +
    theme(
    axis.text.x = element_text(
      angle = 20, 
      hjust = 1, 
      vjust = 1,     # Middle vertical alignment
      size = 8         # Adjust text size if needed
    ))

Missingness

Code
df_rq1x %>%
  mutate(across(everything(), ~ as.numeric(!is.na(.)))) %>%
  pivot_longer(
    cols = everything()
  ) %>%
  group_by(name) %>%
  summarise(percent_notmissing = mean(value)) %>%
  knitr::kable()
name percent_notmissing
adadagetw 0.8824885
adrink 0.9900922
aethnicc 0.9963902
afahqual 0.8852535
afasoc2 0.8877112
alang 0.9860983
alookels 0.9520737
amedtot 0.9932412
amohqual 0.9842550
amosoc2 0.9850230
amumagetw 0.9816436
anoldsib 1.0000000
anyngsib 1.0000000
asingle 0.9827957
asmoke 0.9958525
astress 0.9942396
atwclub 0.9656682
atwmed1 0.9921659
pollution1998pca 0.8841782
sex1 1.0000000
zygos 1.0000000

Missing Data Patterns

Code
df_rq1x %>%
  `colnames<-`((rq1x_labels)) %>%
  as_tibble() %>%
  ggmice::plot_pattern(., rotate = TRUE)

Code
save_plot("1_rq1_misssing_data_pattern", width = 12, height = 30)

df_rq1_imputed %>%
  select(rq1x) %>%
  data.frame() %>%
  `colnames<-`((rq1x_labels)) %>%
  as_tibble() %>%
  ggmice::plot_pattern(., rotate = TRUE)
Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
ℹ Please use `all_of()` or `any_of()` instead.
  # Was:
  data %>% select(rq1x)

  # Now:
  data %>% select(all_of(rq1x))

See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
 /\     /\
{  `---'  }
{  O   O  }
==>  V <==  No need for mice. This data set is completely observed.
 \  \|/  /
  `-----'

Code
save_plot("1_rq1_misssing_data_pattern_imputed", width = 12, height = 30)

Predicting missingness of SDQ/MFQ data

Data Prep

1 = Not missing

Note that at Yr16, the emotion subtest was not administered to parents, so the conduct one is used instead.

Code
list_of_sdq_var = c(
  # "bsdqcemot1", 
  # "csdqcemot1", 
  # "dsdqemot1", 
  # "gpsdqemot1", 
  # "icsdqemot1", "ipsdqemot1", 
  "lcsdqemot1", "lpsdqemot1", 
  "pcbhsdqemot1", "ppbhsdqcont1", 
  "u1csdqemot1", "ucv1sdqemot1", "u1psdqemot1", 
  "zmhsdqemot1"
)

rq1m = c(
  "lcsdqemot1", "lpsdqemot1", "lcmfqt1", "lpmfqt1",
  "pcbhsdqemot1", "ppbhsdqcont1", "pcbhmfqt1", "ppbhmfqt1", 
  "u1csdqemot1", "ucv1sdqemot1", "u1psdqemot1", "u1cmfqt1", "ucv1mfqt1", 
  
  "zmhsdqemot1", "zmhmfqt1"
)

rq1m_childmeasures = c(
  "lcsdqemot1", "lcmfqt1", 
  "pcbhsdqemot1", "pcbhmfqt1", 
  "u1csdqemot1", "ucv1sdqemot1",  "u1cmfqt1", "ucv1mfqt1", 
  "zmhsdqemot1", "zmhmfqt1"
)

rq1m_parentmeasures = c(
  "lpsdqemot1",  "lpmfqt1",
  "ppbhsdqcont1","ppbhmfqt1", 
  "u1psdqemot1"
)

missing_outcomes = df %>% 
  filter(acontact == 1) %>%
  select(all_of(rq1m)) %>%
  sapply(.,function(x) as.numeric(!is.na(x)))

# missing_outcomes = sapply(xx,function(x) as.numeric(is.na(x)))

Descriptives

Code
data.frame(
  var = rq1m,
  labels = var_to_label(rq1m),
  p_notmissing = apply(missing_outcomes, 2, function(x) length(which(x==1))/length(x)*100) %>% round()
)
Code
# Check how concordant missingness is between twins

df %>% 
  filter(acontact == 1) %>%
  select("randomfamid", "twin", all_of(rq1m)) %>%
  # rename_from_labels() %>%
  pivot_wider(id_cols = randomfamid, values_from = rq1m, names_from = twin) %>%
  mutate(
    across(
      ends_with("1") | ends_with("2"), 
      ~as.numeric(!is.na(.))
    )
  ) %>%
  select(-randomfamid) %>%
  gbtoolbox::plot_correlations(
    confidence_interval = FALSE,
    sample_size = FALSE
  )
Warning in gbtoolbox::plot_correlations(., confidence_interval = FALSE, : This function is in development, and not yet ready for widespread use. 
  Proceed with caution
Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
ℹ Please use `all_of()` or `any_of()` instead.
  # Was:
  data %>% select(rq1m)

  # Now:
  data %>% select(all_of(rq1m))

See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.

Code
df %>% 
  filter(acontact == 1) %>%
  select("randomfamid", "twin", all_of(rq1m_childmeasures)) %>%
  # rename_from_labels() %>%
  pivot_wider(id_cols = randomfamid, values_from = rq1m_childmeasures, names_from = twin) %>%
  mutate(
    across(
      ends_with("1") | ends_with("2"), 
      ~as.numeric(!is.na(.))
    )
  ) %>%
  gbtoolbox::plot_correlations(
    confidence_interval = FALSE,
    sample_size = FALSE
  ) + 
  labs(title = "Child Measures Only")
Warning in gbtoolbox::plot_correlations(., confidence_interval = FALSE, : This function is in development, and not yet ready for widespread use. 
  Proceed with caution
Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
ℹ Please use `all_of()` or `any_of()` instead.
  # Was:
  data %>% select(rq1m_childmeasures)

  # Now:
  data %>% select(all_of(rq1m_childmeasures))

See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.

Code